hilo <- hbb_wku_h_xts
hilo <- data.frame(date=index(hilo), coredata(hilo))
hilo <- hilo[529:54816,]
hilo
# which(hilo$date=="2010-10-23 00:00:00") = index 529
# which(hilo$date=="2016-12-31 23:00:00") = index 54816
length(hilo[,1]) # we are left with 54288 lines of data
[1] 54288
54816 - length(hilo[,1]) # we lost 528 values
[1] 528
# removing columns that we are not using
hilo$date.2 <- NULL # another date column
hilo$date.1 <- NULL # another date column
hilo$BGARFU <- NULL # ?
hilo$cfs <- NULL
hilo$DOmgL <- NULL # dissolved oxygen
#hilo$Doper <- NULL # dissolved oxygen
hilo$PAR1 <- NULL # ?
hilo$pH <- NULL # pH
hilo$NTU <- NULL # a different measurement for turbitity
hilo$DOper10 <- NULL # dissolved oxygen
# colnames(hilo) <- c("Date", "cfs", "RiverFlow-cumec", "LogRiverFlow-cumec", "Chlorophyll-RFU", "Salinity-PPT", "Temp-C", "chlorophyll-calibrator", "Turbidity-NTU")
# does not work ???
hilo
====================================================
length(hilo$logcms[which(is.na(hilo$logcms)==TRUE)]) # 12 NAs
[1] 12
which(is.na(hilo$logcms)==TRUE)
[1] 50509 50510 50511 50512 50513 50514 50515 50516 50517 50518 50519 50520
RiverFlow <- ggplot(hilo, aes(x = date, y = as.numeric(cms))) +
geom_line()
print(RiverFlow + ggtitle("River Flow")+labs(x="Time", y = "River Flow - cubic meters per second"))
length(hilo$ChlRFU[which(is.na(hilo$ChlRFU)==TRUE)]) # 20464 NAs
which(as.numeric(hilo$ChlRFU)==max(as.numeric(na.omit(hilo$ChlRFU)))) # 15.3 max
# CHL tells us where in the data set this happened
hilo[38974,]
CHL <- ggplot(hilo, aes(x = date, y = as.numeric(ChlRFU))) +
geom_line()
print(CHL + ggtitle("Chlorophyll ")+labs(x="Time", y = "Chlorophyll - relative fluorescence units (RFU)"))
length(hilo$Corr.NTU[which(is.na(hilo$Corr.NTU)==TRUE)]) #15012 NAs
which(as.numeric(hilo$Corr.NTU)==max(as.numeric(na.omit(hilo$Corr.NTU)))) # 88.4
# tells us where in the data set this happened
hilo[33243,]
TURB <- ggplot(hilo,aes(x = date, y = as.numeric(Corr.NTU))) +
geom_line()
print(TURB + ggtitle("Turbidity ")+labs(x="Time", y = "Turbidity - Nephelometric Turbidity Units (NTU)"))
length(hilo$saltppt[which(is.na(hilo$saltppt)==TRUE)]) #11330 NAs
SALT <- ggplot(hilo, aes(x = date, y = as.numeric(saltppt))) +
geom_line()
print(SALT + ggtitle("Salinity")+labs(x="Time", y = "Salinity - unit parts per thousand (PPT)"))
======================================================== # Histograms FULL DATA SET ## River Flow Histogram
hist(as.numeric(hilo$logcms), main = "Histogram of Log River Flow", xlab = "Log River Flow")
# this looks okay
# VERY skewed
hist(as.numeric(hilo$ChlRFU), main = "Histogram of Chlorophyll", xlab = "Chlorophyll - relative fluorescence units (RFU)")
# this looks better
hist(log(as.numeric(hilo$ChlRFU)), main = "Histogram of Log Chlorophyll", xlab = "Chlorophyll")
# not sure what happens to units when taking the log
# VERY skewed
hist(as.numeric(hilo$Corr.NTU), main = "Histogram of Turbidity", xlab = "Turbidity - Nephelometric Turbidity Units (NTU)")
# this looks better
hist(log(as.numeric(hilo$Corr.NTU)), main = "Histogram of Log Turbidity", xlab = "Turbidity")
# not sure what happens to units when taking the log
# skewed
hist(as.numeric(hilo$saltppt), main = "Histogram of Salinity", xlab = "Salinity - unit parts per thousand (PPT)")
# this is worst!
hist(log(as.numeric(hilo$saltppt)), main = "Histogram of Log Salinity", xlab = "Salinity")
# not sure what happens to units when taking the log
========================================================= # NEW DATA SET-Modified Data 2013-2015
# start date: 2013-01-01 00:00:00
# end date: 2015-12-31 23:00:00
hilomodified <- hilo[19225:45504,]
length(hilo[,1])-length(hilomodified[,1])
[1] 28008
# lost 28008 entries of data
length(hilomodified[,1])-528 # we are left with 25752 entries of data
[1] 25752
head(hilomodified)
tail(hilomodified)
NA
library(mosaic)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'mosaic':
method from
fortify.SpatialPolygonsDataFrame ggplot2
The 'mosaic' package masks several functions from core packages in order to add
additional features. The original behavior of these functions should not be affected by this.
Attaching package: ‘mosaic’
The following object is masked from ‘package:Matrix’:
mean
The following object is masked from ‘package:scales’:
rescale
The following objects are masked from ‘package:dplyr’:
count, do, tally
The following object is masked from ‘package:ggplot2’:
stat
The following objects are masked from ‘package:stats’:
binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test, quantile,
sd, t.test, var
The following objects are masked from ‘package:base’:
max, mean, min, prod, range, sample, sum
favstats(hilomodified$cms)
Auto-converting character to numeric.
favstats(hilomodified$ChlRFU)
favstats(hilomodified$Corr.NTU)
favstats(hilomodified$saltppt)
favstats(hilomodified$TempC)
favstats(hilomodified$Doper)
===================================================== # MODIFIED DATA SET 2013-2015 # Descriptives: Plots
length(hilomodified$logcms[which(is.na(hilomodified$logcms)==TRUE)]) # No NAs, YAY!
[1] 0
which(is.na(hilomodified$logcms)==TRUE)
integer(0)
RiverFlowMod <- ggplot(hilomodified, aes(x = date, y = as.numeric(cms))) +
geom_line()
print(RiverFlowMod + ggtitle("River Flow")+labs(x="Time", y = "River Flow - cubic meters per second"))
sum(is.na(hilomodified$ChlRFU)==TRUE) # 1884 NAs
[1] 1884
#which(is.na(hilomodified$ChlRFU)==TRUE)
# max(as.numeric(na.omit(hilo$ChlRFU))) # 15.3
which(as.numeric(hilomodified$ChlRFU)==15.3)
[1] 19750
hilo[38974,]
CHLMod <- ggplot(hilomodified, aes(x = date, y = as.numeric(ChlRFU))) +
geom_line()
print(CHLMod + ggtitle("Chlorophyll ")+labs(x="Time", y = "Chlorophyll - relative fluorescence units (RFU)"))
length(hilomodified$Corr.NTU[which(is.na(hilomodified$Corr.NTU)==TRUE)]) # 2704 NAs
# max(as.numeric(na.omit(hilo$Corr.NTU))) # 88.4
which(as.numeric(hilo$Corr.NTU)==88.4)
hilo[33243,]
TURBMod <- ggplot(hilomodified, aes(x = date, y = as.numeric(Corr.NTU))) +
geom_line()
print(TURBMod + ggtitle("Turbidity ")+labs(x="Time", y = "Turbidity - Nephelometric Turbidity Units (NTU)"))
length(hilomodified$saltppt[which(is.na(hilomodified$saltppt)==TRUE)]) # 2267 NAs
SALTMod <- ggplot(hilomodified, aes(x = date, y = as.numeric(saltppt))) +
geom_line()
print(SALTMod + ggtitle("Salinity")+labs(x="Time", y = "Salinity - unit parts per thousand (PPT)"))
length(hilomodified$TempC[which(is.na(hilomodified$TempC)==TRUE)]) # 2267 NAs
TempMod <- ggplot(hilomodified, aes(x = date, y = as.numeric(TempC))) +
geom_line()
print(TempMod + ggtitle("Temperature")+labs(x="Time", y = "Temperature - Celsius"))
length(hilomodified$Doper[which(is.na(hilomodified$Doper)==TRUE)]) # 2267 NAs
TempMod <- ggplot(hilomodified, aes(x = date, y = as.numeric(Doper))) +
geom_line()
print(TempMod + ggtitle("Dissolved Oxygen")+labs(x="Time", y = "Dissolved Oxygen in percent of saturation"))
=================================================== # Histograms MODIFIED
hist(as.numeric(hilomodified$cms), main = "Histogram of Log River Flow", xlab = "Log River Flow", breaks =90, xlim = c(0,100))
# this looks okay
# VERY skewed
hist(as.numeric(hilomodified$ChlRFU), main = "Histogram of Chlorophyll", xlab = "Chlorophyll - relative fluorescence units (RFU)")
# this looks better
hist(log(as.numeric(hilomodified$ChlRFU)), main = "Histogram of Log Chlorophyll", xlab = "Chlorophyll")
# not sure what happens to units when taking the log
# VERY skewed
hist(as.numeric(hilomodified$Corr.NTU), main = "Histogram of Turbidity", xlab = "Turbidity - Nephelometric Turbidity Units (NTU)")
# this looks better
hist(log(as.numeric(hilomodified$Corr.NTU)), main = "Histogram of Log Turbidity", xlab = "Turbidity")
# not sure what happens to units when taking the log
# skewed
hist(as.numeric(hilomodified$saltppt), main = "Histogram of Salinity", xlab = "Salinity - unit parts per thousand (PPT)")
# this is worst!
hist(log(as.numeric(hilomodified$saltppt)), main = "Histogram of Log Salinity", xlab = "Salinity")
# not sure what happens to units when taking the log
=============================================================
It is hard to see what is going on
AllYears <- ggplot(hilomodified, aes(x = date, y = as.numeric(logcms))) +
geom_line()+
geom_line(aes(y = as.numeric(saltppt)), color = "darkred") +
geom_line(aes(y = as.numeric(Corr.NTU)), color="darkgreen") +
geom_line(aes(y=as.numeric(ChlRFU)),color="blue")
print(AllYears + ggtitle("Hilo Bay")+labs(x="Time", y = "River Flow - cubic meters per second"))
============================================================== # Descriptives by Storm We are picking one storm from each year. We can indicate a storm has occurred by the extreme events in the river flow data. We will not use the log (which is logbase10) in order to see the extreme events When salinity is below 35 this also indicates a storm has occurred.
We will break the data set by year to find the most extreme event for each year.
# max(as.numeric(hilo2013$logcms)) # this is log base 10
# This is NOT the natural log
max(as.numeric(hilo2013$cms)) # 207.186
which(as.numeric(hilo2013$cms) == max(as.numeric(hilo2013$cms)))
hilo2013[3550,]
# all values for this storm are NA
plot2013 <- ggplot(hilo2013, aes(x = date, y = as.numeric(cms))) +
geom_line(color="black")+
geom_line(aes(y = as.numeric(saltppt)), color = "darkred") +
geom_line(aes(y = as.numeric(Corr.NTU)), color="darkgreen") +
geom_line(aes(y=as.numeric(ChlRFU)),color="blue")
print(plot2013 + ggtitle("2013")+labs(x="Time", y = "River Flow - cubic meters per second"))
# R2013.1 <- ggplot(hilo2013[1:(length(hilo2013[,1])/2),], aes(x = date, y = as.numeric(cms))) +
# geom_line(color="black")+
# geom_line(aes(y = as.numeric(saltppt)), color = "darkred") +
# geom_line(aes(y = as.numeric(Corr.NTU)), color="darkgreen") +
# geom_line(aes(y=as.numeric(ChlRFU)),color="blue")
# print(R2013.1 + ggtitle("River Flow")+labs(x="Time", y = "River Flow - cubic meters per second"))
# R2013.1+ylim(0,40)
#
#
# R2013.2 <- ggplot(hilo2013[(length(hilo2013[,1])/2):length(hilo2013[,1]),], aes(x = date, y = as.numeric(cms))) +
# geom_line(color="black")+
# geom_line(aes(y = as.numeric(saltppt)), color = "darkred") +
# geom_line(aes(y = as.numeric(Corr.NTU)), color="darkgreen") +
# geom_line(aes(y=as.numeric(ChlRFU)),color="blue")
# print(R2013.2 + ggtitle("River Flow")+labs(x="Time", y = "River Flow - cubic meters per second"))
# R2013.2+ylim(0,40)
# Max river flow in the overall data set
max(as.numeric(hilo2014$cms))
max(as.numeric(hilomodified$cms))
which(as.numeric(hilo2014$cms) == max(as.numeric(hilo2014$cms)))
hilo2014[5263,]
R2014 <- ggplot(hilo2014, aes(x = date, y = as.numeric(logcms))) +
geom_line(color="black")+
geom_line(aes(y = as.numeric(saltppt)), color = "darkred") +
geom_line(aes(y = as.numeric(Corr.NTU)), color="darkgreen") +
geom_line(aes(y=as.numeric(ChlRFU)),color="blue")
print(R2014 + ggtitle("2014")+labs(x="Time", y = "River Flow - cubic meters per second"))
max(as.numeric(hilo2015$cms))
which(as.numeric(hilo2015$cms) == max(as.numeric(hilo2015$cms)))
hilo2015[6475,]
R2015 <- ggplot(hilo2015, aes(x = date, y = as.numeric(logcms))) +
geom_line(color="black")+
geom_line(aes(y = as.numeric(saltppt)), color = "darkred") +
geom_line(aes(y = as.numeric(Corr.NTU)), color="darkgreen") +
geom_line(aes(y=as.numeric(ChlRFU)),color="blue")
print(R2015 + ggtitle("2015")+labs(x="Time", y = "River Flow - cubic meters per second"))
outcomes <- as.numeric(hilomodified$saltppt)<25
index.lt25 <- which(outcomes==TRUE)
poss.storms <- hilomodified[index.lt25,]
poss.storms
outcomes.35 <- as.numeric(hilomodified$saltppt)<35
index.lt35 <- which(outcomes==TRUE)
index.lt25
poss.storms35 <- hilomodified[index.lt25,]
poss.storms35
hilomodified$date[16]
storm.1.7.13 <- ggplot(hilomodified[145:168,], aes(x = date, y = as.numeric(cms))) +
geom_line(color="black")+
geom_line(aes(y = as.numeric(saltppt)), color = "lightsteelblue1") +
geom_line(aes(y = as.numeric(Corr.NTU)), color="grey69") +
geom_line(aes(y=as.numeric(ChlRFU)),color="khaki")
print(storm.1.7.13 + ggtitle("Storm 1/7/13")+labs(x="Time"))
RiverFlow1 <- ggplot(hilomodified[1:100,], aes(x = date, y = as.numeric(cms))) +
geom_line()
print(RiverFlow + ggtitle("River Flow")+labs(x="Time", y = "River Flow - cubic meters per second"))
=======
getwd()
# write.csv(hilomodified,file="HiloBayNEW13to15.csv", row.names = FALSE)
# any value greater than or equal to 10 cms indicates a storm
pos.neg <- as.numeric(hilomodified$cms) - 10
start <- vector()
end <- vector()
for(i in 1:length(pos.neg)){
if(isTRUE(pos.neg[i] < 0 && pos.neg[i+1] > 0)){
# if goes from + to - then this is the beginning of a storm
start[i] <- i
}else if(isTRUE(pos.neg[i] > 0 && pos.neg[i+1] < 0)){
# if goes from - to + then this is the end of the storm
end[i] <- i
}
}
start<-start[!is.na(start)] # removing NA
start
[1] 141 212 891 1097 1106 3480 3499 3545 5309 7537 8713 8729 10388 10390
[15] 10411 10727 10737 10843 10917 11216 11340 11508 11646 12581 13406 13458 13551 13655
[29] 13866 14010 15233 15287 15722 16054 16727 19777 19802 19920 20066 20386 20622 20662
[43] 21143 21163 22249 22656 22762 22911 23008 23015 23060 23116 23201 23395 23640 23703
[57] 23740 23757 23774 23807 23947 24141 24312 24437 24938 25005 25246 25474 25492 25972
[71] 26107 26187
length(start) # make sure both start and end are the same length
[1] 72
end<-end[!is.na(end)]
end
[1] 162 226 903 1102 1398 3495 3500 3572 5338 7547 8718 8765 10389 10404
[15] 10412 10732 10752 10845 11044 11232 11346 11525 11695 12596 13445 13514 13648 13670
[29] 13888 14111 15235 15288 15750 16148 16749 19790 19811 19938 20087 20394 20650 20719
[43] 21153 21169 22257 22675 22771 22972 23013 23020 23070 23130 23215 23403 23670 23719
[57] 23742 23761 23781 23899 24082 24168 24325 24510 24945 25233 25328 25482 25499 26093
[71] 26117 26214
length(end)
[1] 72
library(ggplot2)
for(i in 1:length(start)){
# the point start[i] is the beginning of a storm
# the point end[i] is the end of the same storm
# we are adding 24 hours to each start and end time
p <- ggplot(hilomodified[(start[i]-24):(end[i]+24),],aes(x = date, y = as.numeric(cms))) +
geom_line(color="black")
p <- p + geom_line(aes(y = range01(as.numeric(Corr.NTU))), color="green")
p <- p + geom_line(aes(y= range01(as.numeric(ChlRFU))),color="blue")
p <- p + scale_y_continuous(name = "First Axis", sec.axis = sec_axis(trans = ~.*.05, name="Second Axis")
)
# geom_line(aes(y = range01(as.numeric(Corr.NTU))), color="green") +
# geom_line(aes(y= range01(as.numeric(ChlRFU))),color="blue") +
# geom_line(aes(y = range01(as.numeric(saltppt))),color="red") +
# geom_line(aes(y = range01(as.numeric(TempC))),color="purple") +
# geom_line(aes(y = range01(as.numeric(Doper))),color="orange")+
p <- p + ggtitle("Storms")+labs(x="Time")
print(p)
}
range01 <- function(x, na.rm = TRUE) {
# version 1.1 (9 Aug 2013)
(x - min(x, na.rm = na.rm)) / (max(x, na.rm = na.rm) - min(x, na.rm = na.rm))
}
for(i in 1:length(start)){
index <- c((start[i]-24):(end[i]+24))
date <- c(hilomodified$date[index])
enddate <- hilomodified$date[end[i]+24]
endcms <- as.numeric(hilomodified$cms[end[i]+24])
endchl <- range01(as.numeric(hilomodified$cms[end[i]+24]))
endturb <- as.numeric(hilomodified$cms[end[i]+24])
endsalt <- as.numeric(hilomodified$cms[end[i]+24])
endtemp <- as.numeric(hilomodified$cms[end[i]+24])
endDO <- as.numeric(hilomodified$cms[end[i]+24])
par(mar = c(5, 4, 4, 4) + 0.3) # Additional space for second y-axis
plot(x = date,y = as.numeric(hilomodified$cms[index]), type = "l",lwd=2, col="black", xlab="Time") # Create first plot for River Flow
par(new = TRUE) # Add new plot
plot(x = date ,y = range01(as.numeric(hilomodified$ChlRFU[index])),axes = FALSE, type = "l",lwd=2, col="blue", ylab = "Time") # Create second plot without axes
lines(x = date ,y = range01(as.numeric(hilomodified$Corr.NTU[index])), type = "l",lwd=2, col="red")
lines(x = date ,y = range01(as.numeric(hilomodified$saltppt[index])), type = "l",lwd=2, col="green")
lines(x = date ,y = range01(as.numeric(hilomodified$TempC[index])), type = "l",lwd=2, col="orange")
lines(x = date ,y = range01(as.numeric(hilomodified$Doper[index])), type = "l",lwd=2, col="purple")
axis(side = 4) # Add second axis
#mtext("y2", side = 4, line = 3) # Add second axis label
text(x= enddate, y = endcms, col = "black", adj=1, cex=0.85, label= "River Flow")
text(x= enddate, y = endchl, col = "blue", adj=1, cex=0.85, label= "CHL")
text(x= enddate, y = endturb, col = "red", adj=1, cex=0.85, label= "TURB")
text(x= enddate, y = endsalt, col = "green", adj=1, cex=0.85, label= "Salinity")
text(x= enddate, y = endtemp, col = "orange", adj=1, cex=0.85, label= "TEMP")
text(x= enddate, y = endDO, col = "purple", adj=1, cex=0.85, label= "DIS")
}